Dependencies

Install the following libraries before running the TDA pipeline.

#install.packages(c("TDA", "Matrix"))

Load libraries

library("Matrix") #package for sparse matrices
library("TDA")

Set working directory

working_dir_path <- setwd("~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline")

Load necessary functions

source("utilities.R")

Create a folder with a subfolder for each data group for storing saved tda computations

You only need to run this block once

dir.create("saved_computations", showWarnings = FALSE)
saved_computations <- concat_path(working_dir_path, "saved_computations")

#choose names for two data groups
groups <- list("group_0", "group_25")

for (i in 1:length(groups)){
  dir.create(concat_path(saved_computations,groups[i]),  showWarnings = FALSE)
}

Create a single csv file for storing necessary parameters (max birth and max death) obtained from all data groups

You only need to run this block once; after running it once comment it out

#max_values <- data.frame(matrix(ncol = 2, nrow = 0))
#colnames(max_values) <- c("maxbirth", "maxdeath")  

#write.csv(max_values, concat_path(saved_computations,"max-values.csv"), row.names=FALSE)

I. Are you using our cell type identification method?

If you are using different cell type identification method, have separate csv files for cells of distinct types

#set 'yes' if you use our cell type identification method;
#set 'no' if you use different cell type identification method; in this case, have separate csv files for cells of distinct types

our_cell_type_identification <- "yes"

if (our_cell_type_identification == "yes"){
  #specify location path of the file with the cell type codes and their colors
  cell_types_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Identification/cell_colors.csv"

  #read all cell type codes from your designated file
  cell_types_file <- read.csv(cell_types_path)
  all_cell_types <- cell_types_file$Code

  # indicate cell types for only including specific cells 
  cell_types <- all_cell_types[2]   # this can either be a single value or a vector

  index <- NaN    # default index for cell types is the last column

  sprintf("Analysis will be done for cells of type: %s", paste(cell_types, collapse=", "))
}
## [1] "Analysis will be done for cells of type: 1"
#this will include all cells
if (our_cell_type_identification == "no"){
  print("You are using a different cell type identification method. Analysis will be done for all cells.")
  cell_types <- NaN 
  index <- NaN
}

II. Choose a data group and its location

#choose a group folder where to save computations
group <- "group_25"

# specify location path of the csv files of that group
csv_dir_path <- "~/Documents/GitHub/TDA-Microscopy-Pipeline/Discretized-images/25"

III. Compute persistence diagrams and save them

We use Delaunay complex filtration to compute persistence diagrams

compute_PDs(csv_dir_path, group, saved_computations, cell_types, index)
## [1] "Processing file 25_dox_1.csv"
## [1] "Processing file 25_dox_10.csv"
## [1] "Processing file 25_dox_11.csv"
## [1] "Processing file 25_dox_12.csv"
## [1] "Processing file 25_dox_13.csv"
## [1] "Processing file 25_dox_14.csv"
## [1] "Processing file 25_dox_15.csv"
## [1] "Processing file 25_dox_16.csv"
## [1] "Processing file 25_dox_2.csv"
## [1] "Processing file 25_dox_3.csv"
## [1] "Processing file 25_dox_4.csv"
## [1] "Processing file 25_dox_5.csv"
## [1] "Processing file 25_dox_6.csv"
## [1] "Processing file 25_dox_7.csv"
## [1] "Processing file 25_dox_8.csv"
## [1] "Processing file 25_dox_9.csv"
## [1] "Persistence diagrams are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_PDs.RData"

IV. Determine the max birth and max death among all data files

This is crucial for VII block and will ensure that all plots from both groups have the same scale

#Before running this block, compute persistence diagrams for all data groups (i.e choose another data group and specify its location in II-block, and compute its persistence diagrams in III-block) 

max_values <- read.csv(concat_path(saved_computations, "max-values.csv"))

max_birth <- max(max_values$maxbirth)
max_death <- max(max_values$maxdeath)

Plot persistence diagrams

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_PDs(group, max_birth, max_death, saved_computations, save_plot)
## [1] "Persistence diagrams plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"

Plot representative cycles that persist (live) over a certain threshold

#choose a persistence threshold
persist_threshold <- 20 

save_plot <- TRUE #set FALSE if you don't want to save plots

#plot representative cycles with persistence above the chosen threshold
plot_representative_cycles(csv_dir_path, cell_types, persist_threshold, group, saved_computations, save_plot)
## [1] "Processing file 25_dox_1.csv"
## # Generated complex of size: 7557

## [1] "Processing file 25_dox_10.csv"
## # Generated complex of size: 5079

## [1] "Processing file 25_dox_11.csv"
## # Generated complex of size: 5561

## [1] "Processing file 25_dox_12.csv"
## # Generated complex of size: 7909

## [1] "Processing file 25_dox_13.csv"
## # Generated complex of size: 1883

## [1] "Processing file 25_dox_14.csv"
## # Generated complex of size: 2323

## [1] "Processing file 25_dox_15.csv"
## # Generated complex of size: 2851

## [1] "Processing file 25_dox_16.csv"
## # Generated complex of size: 4557

## [1] "Processing file 25_dox_2.csv"
## # Generated complex of size: 11789

## [1] "Processing file 25_dox_3.csv"
## # Generated complex of size: 12539

## [1] "Processing file 25_dox_4.csv"
## # Generated complex of size: 12547

## [1] "Processing file 25_dox_5.csv"
## # Generated complex of size: 4931

## [1] "Processing file 25_dox_6.csv"
## # Generated complex of size: 7313

## [1] "Processing file 25_dox_7.csv"
## # Generated complex of size: 8131

## [1] "Processing file 25_dox_8.csv"
## # Generated complex of size: 10765

## [1] "Processing file 25_dox_9.csv"
## # Generated complex of size: 3591

## [1] "Representative cycle are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"

V. Compute persistence landscapes and save them

You need to have persistence diagrams computed first

#choose a discretization step (use the same for all groups)
discr_step <- 0.3

save_computations_csv <- FALSE #set TRUE if additionally to saving persistence landscapes in RData format you want to save each of them in a separate csv file. This option is provided if, for example, you plan to use persistence landscapes for further analysis in another programming language. Note that in a csv file row k corresponds to a persistence landscape function at depth k, where depth 1 corresponds to a outermost persistence landscape function, and column names correspond to x-values. 

compute_PLs(group, max_birth, max_death, discr_step, saved_computations, save_computations_csv)
## [1] "Persistence landscapes are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_PLs.RData"

Plot persistence landscapes

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_PLs(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Persistence landscape plots are saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25"

VI. Compute the average persistence landscape and save it

You need to have persistence landscapes computed first

compute_avgPL(group, max_birth, max_death, discr_step, saved_computations)
## [1] "Average persistence landscape is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25/group_25_avgPL.RData"

Plot the average persistence landscape

save_plot <- TRUE #set FALSE if you don't want to save plots

plot_avgPL(group, max_birth, max_death, discr_step, saved_computations, save_plot)
## [1] "Average persistence landscape plot is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations/group_25"

At this point, you need to have computed persistence diagrams, persistence landscapes, and average persistence landscapes for BOTH groups


VII. Plot the difference of average persistence landscapes of two data groups

You need to have computed average persistence landscapes for both groups (make sure that they were plotted on the same scale)

save_plot <- TRUE #set FALSE if you don't want to save plots

#avgPL of groups[0] minus avgPL of groups[1]
plot_avgPLs_difference(groups, max_birth, max_death, discr_step, saved_computations, save_plot) 
## [1] "Plot of the difference of two average persistence landscapes is saved in ~/Documents/GitHub/TDA-Microscopy-Pipeline/TDA/general-pipeline/saved_computations"

VIII. Perform a permutation test on persistence landscapes from two data groups

You need to have computed persistence landscapes for both groups

#number of permutations
num_repeats <- 10000

permutation_test_for_PLs(groups, saved_computations, num_repeats)
## [1] "p-value 0.0007"